IMPORT MODULES

Python modules used.

In [1]:
if True:
    import pandas as pd
    import plotly.graph_objects as go
    import plotly.express as px
    import plotly.offline as py
    import datetime
    import numpy as np
    import math
    import reverse_geocoder as rg
    from scipy.stats import norm
    from scipy.stats import expon
    from scipy.stats import lognorm
    from scipy.stats import weibull_min
    from scipy.stats import weibull_max
    from scipy.stats import alpha
    from scipy.stats import beta
    import scipy.stats as stats
    %matplotlib inline
    from scipy import stats
    from fitter import Fitter
    from IPython.display import Image
    #plotly.offline.init_notebook_mode(connected=True)
    py.init_notebook_mode(connected=False)
    #ignore deprication warnings
    import warnings
    warnings.filterwarnings('ignore')
    import operator
    import collections
    from scipy.optimize import minimize
In [2]:
def objfun(x,*args):
    #x = params
    params=x
    #*args = dist, data
    dist = args[0]
    data = args[1]
    weights = args[2]
    fit = [dist.cdf(j, *params) for j in sorted(weights)]
    err = checkfit(data,fit)
    return err
def checkfit(data,fit):
    err=0
    for i in range(len(data)):
        err+=(data[i]-fit[i])**2
    return err

IMPORT DATA

For data reports: https://www.nohrsc.noaa.gov/nsa/reports.html

For definition of terms https://www.nohrsc.noaa.gov/help/

In [3]:
#read in the Surface Water Equivalent Data
SWE = pd.read_csv('swe.csv') 

DATA CLEANING

In [4]:
if True:
    # remove some unused columns
    SWE = SWE.drop(["Unnamed: 0","Unnamed: 0.1", "Unnamed: 10", "Unnamed: 9","Zip_Code"], axis=1)
    
    # Add a column that counts the number of entries from each station
    SWE['StationCounts'] = SWE.groupby(['Station_Id'])['Station_Id'].transform('count')
    # Add a column that records the time of first data for each station
    SWE['StartTime'] = SWE.groupby(['Station_Id'])['DateTime_Report(UTC)'].transform('min')

    # Make a copy of the dataframe with only unique stations for plotting on a map.
    SWE_Stations = SWE.drop_duplicates('Station_Id',keep='first')
In [5]:
#plot the stations on a map
if True:
    #fig = px.scatter_mapbox(SWE_Stations, lon="Longitude", lat="Latitude", hover_name="Station_Id", hover_data=["Station_Id","StationCounts"],
    #                        zoom=4, height=300,color='StartTime',color_continuous_scale=px.colors.sequential.Viridis)
    fig = px.scatter_mapbox(SWE_Stations, lon="Longitude", lat="Latitude", hover_name="Station_Id", hover_data=["Station_Id","StartTime"],
                            zoom=4, height=300)

    fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    print("Plot of All SWE Data Sites in the Northeast")
    fig.update_layout(showlegend=False)
    fig.show()
Plot of All SWE Data Sites in the Northeast
In [6]:
#remove sites with fewer than 100 total data points
mincount = 100
if True:
    #remove the zeros
    SWE = SWE[SWE.Amount>0]

    # add columns for year,month,day
    SWE['year'] = pd.DatetimeIndex(SWE['DateTime_Report(UTC)']).year
    SWE['month'] = pd.DatetimeIndex(SWE['DateTime_Report(UTC)']).month
    SWE['day'] = pd.DatetimeIndex(SWE['DateTime_Report(UTC)']).day

    # throw away the stations with less than 'mincount' data points?
    SWE = SWE[SWE['StationCounts']>mincount]
    SWE_Stations = SWE.drop_duplicates('Station_Id',keep='first')
In [7]:
#drop all stations outside of MA
if True:
    SWE_Stations = SWE_Stations[SWE_Stations['Latitude']<42.8]
    SWE_Stations = SWE_Stations[SWE_Stations['Latitude']>41.9]
    SWE_Stations = SWE_Stations[SWE_Stations['Longitude']>-73]
if False:
    #THIS TAKES A WHILE, ONLY USED FOR SHOWING THE MAP PLOT OF STATIONS
    # uses reverse geocoding..
    # not really necessary here, just use coordinates.
    rows_to_delete=[]
    for i, row in enumerate(SWE_Stations.itertuples(), 1):
        #print(i,row.Index)
        coords = (SWE_Stations.iloc[i-1]['Latitude'],SWE_Stations.iloc[i-1]['Longitude'])
        results = rg.search(coords)
        if results[0]['admin1'] != "Massachusetts":
            rows_to_delete.append(row.Index)
    SWE_Stations = SWE_Stations.drop(rows_to_delete)
In [8]:
#add columns for the initial date of each station
SWE_Stations['StartDate'] = pd.to_datetime(SWE_Stations['StartTime'])
SWE_Stations['epoch'] = pd.to_numeric(SWE_Stations['StartDate'])
In [9]:
#plot the stations on a map
if True:
    mapbox_access_token = 'pk.eyJ1IjoidGZnMjUwIiwiYSI6ImNrM2FoOXJvMjBjaHEzZHJ0MjdoczNxczUifQ.NdBJ3-eSMYknzg_vfPm0Qg'
    length = max(SWE_Stations['epoch']) - min(SWE_Stations['epoch'])
    tickvals = [min(SWE_Stations['epoch']) + (length/12)*i for i in range(0,13)]
    ticktext = list(range(2007,2020))
    txt = ["Station_Id: "+str(i)+"<br>" +"StationCounts: "+ str(j) for i, j in zip(list(SWE_Stations['Station_Id']), list(SWE_Stations['StationCounts']))]
    fig = go.Figure()
    fig.add_trace(go.Scattermapbox(text = txt,hoverinfo = 'text',lon=SWE_Stations["Longitude"],
            lat=SWE_Stations["Latitude"],mode='markers',marker=go.scattermapbox.Marker(opacity=1,
            size=SWE_Stations['StationCounts']/40.,color=SWE_Stations['epoch'],colorscale='viridis',
            colorbar={'title':'Start Date','tickmode':'array','tickvals':tickvals,'ticktext':ticktext})))
    

    fig.update_layout(mapbox_style="open-street-map")
    fig.update_layout(hovermode='closest',
     mapbox=go.layout.Mapbox(
        accesstoken=mapbox_access_token,
        bearing=0,
        center=go.layout.mapbox.Center(
            lat=42.4,
            lon=-72),
        pitch=0,
        zoom=7))
    
    
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    print("Plot of sites in MA with at least " + str(mincount)+" Data Points")
    print("color is the start date, marker size is the number of data points")
    #fig.update_layout(showlegend=False)
    fig.show()    
Plot of sites in MA with at least 100 Data Points
color is the start date, marker size is the number of data points

GET ANNUAL SWE MAXIMUMS (For each station)

In [10]:
#GET MONTHLY SWE MAXIMUMS (For each station)
#The entire row that governs is carried forward

SELECTED_STATIONS = ['GRFM3']
#SELECTED_STATIONS = ['ABNM3']
#SELECTED_STATIONS = ['LWMM3']
#SELECTED_STATIONS = ['MILM3']
#SELECTED_STATIONS = ['HRKM3']
#SELECTED_STATIONS = ['ASHM3']
#SELECTED_STATIONS = ['NRNM3']
#SELECTED_STATIONS = ['GRFM3','ABNM3','LWMM3','MILM3','HRKM3','ASHM3','NRNM3']
#SELECTED_STATIONS = list(SWE_Stations['Station_Id'])

if True:
    MONTHLY_DATA=pd.DataFrame() 
    ANNUAL_DATA=pd.DataFrame() 

    #for all stations?
    #for station in set(SWE_Stations['Station_Id']):

    #select the station with the most data       
    #for station in SELECTED_STATIONS:
    if True:
        for yr in range(2006,2020):
            tmp1 = SWE[(SWE['year']==yr) & (SWE.Station_Id.isin(SELECTED_STATIONS))] 
            try:
                ANNUAL_DATA = ANNUAL_DATA.append(tmp1.loc[tmp1['Amount'].idxmax()])
            except:
                pass

            for month in range(1,13):
                tmp2 = SWE[(SWE['year']==yr) & (SWE['month']==month) & (SWE.Station_Id.isin(SELECTED_STATIONS))]
                
                try:
                    tmp2['AnnualData'] = max(max(tmp1["Amount"]),max(tmp2['Amount']))
                    MONTHLY_DATA = MONTHLY_DATA.append(tmp2.loc[tmp2['Amount'].idxmax()])
                except:
                    pass


    #limit monthly data to full years
    #MONTHLY_DATA = MONTHLY_DATA[(MONTHLY_DATA['year']>2007)]            

    #drop some columns no longer needed
    ANNUAL_DATA = ANNUAL_DATA.drop(["StationCounts","Latitude", "Longitude"], axis=1)
    MONTHLY_DATA = MONTHLY_DATA.drop(["StationCounts","Latitude", "Longitude"], axis=1)
In [11]:
#ANNUAL_DATA
In [12]:
#for the purposes of plotting put in zeros for months without data
if True:
    for yr in range(2007,2020):
        try:
            tmp1 = ANNUAL_DATA[(ANNUAL_DATA['year']==yr)]
            #print(tmp1,"\n\n")
            for month in range(1,13):
                tmp = MONTHLY_DATA[(MONTHLY_DATA['year']==yr) & (MONTHLY_DATA['month']==month)]
                #print(tmp.shape[0])
                if tmp.shape[0]==0:
                    #add a zero.
                    #'2006-11-25 16'
                    dfadd = pd.DataFrame([[yr,month,15,0,str(yr)+"-"+"%02i" %month+"-01 00",float(list(tmp1['Amount'])[0])]], columns = ['year','month','day','Amount','DateTime_Report(UTC)','AnnualData'])
                    MONTHLY_DATA=MONTHLY_DATA.append(dfadd)
        except:
            pass
    MONTHLY_DATA = MONTHLY_DATA.sort_values(by=['DateTime_Report(UTC)'])
In [13]:
#MONTHLY_DATA
In [14]:
#Plot the data over time
if True:
    fig = go.Figure()
    fig.add_trace(go.Line(x=MONTHLY_DATA['DateTime_Report(UTC)'], y=MONTHLY_DATA['Amount'],
                        mode='lines',marker=dict(color='red',size=8),line_shape='hvh',
                        name='Monthly Maximum Data'))
    fig.add_trace(go.Line(x=MONTHLY_DATA['DateTime_Report(UTC)'], y=MONTHLY_DATA['AnnualData'],
                        mode='lines',marker=dict(color='black',size=8),line_shape='hvh',
                        name='Annual Maximum Data'))

    fig.update_layout(
        title="Annual & Monthly Maximum SWE Data",
        xaxis_title="Date",
        yaxis_title="SWE (inches of water)",
        template='plotly_white')
    fig.show() 
In [15]:
#remove the zeros
if True:
    ANNUAL_DATA = ANNUAL_DATA[ANNUAL_DATA.Amount>0]
    MONTHLY_DATA = MONTHLY_DATA[MONTHLY_DATA.Amount>0]

FIT A DISTRIBUTION (ANNUAL DATA)

Set up the fitting

In [16]:
if True:
    swe_vals_annual = sorted(list(ANNUAL_DATA["Amount"]))
    h20 = 5.202288 #psf per inch of depth
    weight_vals_annual = [i*h20 for i in swe_vals_annual]
    n=len(weight_vals_annual)

Solve for distribution parameters using python "fitter" module

https://pythonhosted.org/fitter/

In [17]:
if True:
    data_array = np.asarray(weight_vals_annual)
    f = Fitter(data_array,verbose=False)
    f.fit()
    f.summary()

Show best fit distributions

In [18]:
showbest = 5
if True:
    DISTRIBUTIONS = [stats.alpha,stats.anglit,stats.arcsine,stats.argus,stats.beta,stats.betaprime,stats.bradford,stats.burr,stats.burr12,stats.cauchy,stats.chi,stats.chi2,stats.cosine,stats.crystalball,stats.dgamma,stats.dweibull,stats.erlang,stats.expon,stats.exponnorm,stats.exponpow,stats.exponweib,stats.f,stats.fatiguelife,stats.fisk,stats.foldcauchy,stats.foldnorm,stats.frechet_l,stats.frechet_r,stats.gamma,stats.gausshyper,stats.genexpon,stats.genextreme,stats.gengamma,stats.genhalflogistic,stats.genlogistic,stats.gennorm,stats.genpareto,stats.gilbrat,stats.gompertz,stats.gumbel_l,stats.gumbel_r,stats.halfcauchy,stats.halfgennorm,stats.halflogistic,stats.halfnorm,stats.hypsecant,stats.invgamma,stats.invgauss,stats.invweibull,stats.johnsonsb,stats.johnsonsu,stats.kappa3,stats.kappa4,stats.ksone,stats.kstwobign,stats.laplace,stats.levy,stats.levy_l,stats.levy_stable,stats.loggamma,stats.logistic,stats.loglaplace,stats.lognorm,stats.lomax,stats.maxwell,stats.mielke,stats.moyal,stats.nakagami,stats.ncf,stats.nct,stats.ncx2,stats.norm,stats.norminvgauss,stats.pareto,stats.pearson3,stats.powerlaw,stats.powerlognorm,stats.powernorm,stats.rayleigh,stats.rdist,stats.recipinvgauss,stats.reciprocal,stats.rice,stats.rv_continuous,stats.rv_histogram,stats.semicircular,stats.skewnorm,stats.t,stats.trapz,stats.triang,stats.truncexpon,stats.truncnorm,stats.tukeylambda,stats.uniform,stats.vonmises,stats.vonmises_line,stats.wald,stats.weibull_max,stats.weibull_min,stats.wrapcauchy]
    DIST_NAMES = ['alpha','anglit','arcsine','argus','beta','betaprime','bradford','burr','burr12','cauchy','chi','chi2','cosine','crystalball','dgamma','dweibull','erlang','expon','exponnorm','exponpow','exponweib','f','fatiguelife','fisk','foldcauchy','foldnorm','frechet_l','frechet_r','gamma','gausshyper','genexpon','genextreme','gengamma','genhalflogistic','genlogistic','gennorm','genpareto','gilbrat','gompertz','gumbel_l','gumbel_r','halfcauchy','halfgennorm','halflogistic','halfnorm','hypsecant','invgamma','invgauss','invweibull','johnsonsb','johnsonsu','kappa3','kappa4','ksone','kstwobign','laplace','levy','levy_l','levy_stable','loggamma','logistic','loglaplace','lognorm','lomax','maxwell','mielke','moyal','nakagami','ncf','nct','ncx2','norm','norminvgauss','pareto','pearson3','powerlaw','powerlognorm','powernorm','rayleigh','rdist','recipinvgauss','reciprocal','rice','rv_continuous','rv_histogram','semicircular','skewnorm','t','trapz','triang','truncexpon','truncnorm','tukeylambda','uniform','vonmises','vonmises_line','wald','weibull_max','weibull_min','wrapcauchy']
    skips = []
    
    fitter_errors={}
    min_error=9e99
    for i in range(len(DIST_NAMES)):
        #print(DIST_NAMES[i])
        if DIST_NAMES[i] not in skips:
            fitter_errors[DIST_NAMES[i]] = f.df_errors['sumsquare_error'][DIST_NAMES[i]]
            min_error = min(min_error,f.df_errors['sumsquare_error'][DIST_NAMES[i]])
    
    ErrorThreshold = np.sort(list(fitter_errors.values()))[showbest]
    print('ErrorThreshold=',"%.4f" %ErrorThreshold)
    
    fig = go.Figure()
    n = len(weight_vals_annual)
    Pvals_annual = []
    for i in range(len(sorted(weight_vals_annual))):
        Pvals_annual.append((i+1)/(n+1))


    for i in range(len(DIST_NAMES)):
        #print(DIST_NAMES[i])
        if DIST_NAMES[i] not in skips:
            error = f.df_errors['sumsquare_error'][DIST_NAMES[i]]
            if np.isnan(error) or error>ErrorThreshold:
                visible='legendonly'
            else:
                visible=True

            try:
                name = DIST_NAMES[i]
                dist = DISTRIBUTIONS[i]
                params = f.fitted_param[name]

                yy = x=np.linspace(0.001,0.999,98)
                xx = [dist.ppf(i, *params) for i in yy]
                fig.add_trace(go.Scatter(x=xx, y=yy, line=dict(width=1),hoverinfo='name+text',
                                    mode='lines',hovertext="%1.3g" % error,visible=visible,name=name+" "+"%1.3g" % error))
            except KeyError:
                pass


    fig.add_trace(go.Scatter(x=sorted(weight_vals_annual), y=Pvals_annual,
                        mode='markers',marker=dict(color='black',size=8),
                        name='data'))

    fig.update_layout(
        title="Fitting Distributions to Annual Maximum Data Using Python FITTER Module",
        xaxis_title="Annual Maximum Measured Snow Weight (psf)",
        yaxis_title="Probability",
        template='plotly_white')
    fig.update_xaxes(range=[0, 1.5*max(weight_vals_annual)])
    fig.show() 
ErrorThreshold= 0.0443
In [19]:
#use these distributions to estimate the 50yr MRI event. 
if True:
    best_fits=[]
    for i in range(len(DIST_NAMES)):
        error = f.df_errors['sumsquare_error'][DIST_NAMES[i]]
        if np.isnan(error) or error<=ErrorThreshold:
            name = DIST_NAMES[i]
            dist = DISTRIBUTIONS[i]
            params = f.fitted_param[name]                
            best_fits.append(name)

    yy = 0.98
    #print(months_per_year,yy,"\n\n")
    print("\t\t50yr MRI Snow Load from Monthly Maximums")
    for fit in best_fits:
        i = DIST_NAMES.index(fit)
        dist = DISTRIBUTIONS[i]
        params = f.fitted_param[fit]
        xx = dist.ppf(0.98, *params)
        if not np.isnan(xx) and not np.isinf(xx):
            print("%20s"%fit,"\t","%.2f" %xx,"psf")
		50yr MRI Snow Load from Monthly Maximums
           betaprime 	 194.15 psf
                chi2 	 173.40 psf
            exponpow 	 182.63 psf
                   f 	 230.43 psf
               ksone 	 96.44 psf
              mielke 	 120.40 psf
            nakagami 	 143.84 psf

These don't appear to fit very well, since the fit is based on the histogram (pdf) and there isn't enough data here to make a decent histogram. So solve for optimized parameters using my own objective function that compares CDF values.

Improve the fit by solving for new distribution parameters

Set up an optimization function based on error between the data and the distribution in terms of CDF. Run scipy optimize to find the best fitting distribution parameters

In [20]:
#THIS TAKES A WHILE...
if True:
    ERROR={}
    NEW_PARAMS={}

    for i in range(len(DIST_NAMES)):
        if DIST_NAMES[i] in f.fitted_param:
            name = DIST_NAMES[i]
            dist = DISTRIBUTIONS[i]
            params = f.fitted_param[name]
            sol = minimize(objfun, params, args=(dist,Pvals_annual,weight_vals_annual))
            error = sol.fun
            newparams = sol.x
            ERROR[name] = error
            NEW_PARAMS[name] = newparams

        #except:
        #    pass
    ERROR = sorted(ERROR.items(), key=operator.itemgetter(1))
    ERROR = collections.OrderedDict(ERROR)
In [21]:
#plot the new fits
showbest=5
showall='legendonly'
if True:
    #ErrorThreshold=1.25*min(ERROR.values())
    ErrorThreshold = np.sort(list(ERROR.values()))[showbest]
    print('ErrorThreshold=',"%.4f" %ErrorThreshold)
    
    fig = go.Figure()
    skips = []
    for i in range(len(DIST_NAMES)):
        #try:
        if True:
            name = DIST_NAMES[i]
            if name not in skips and name in ERROR:
                error = ERROR[name]
                if np.isnan(error) or error>ErrorThreshold:
                    #visible='legendonly'                    
                    visible=showall
                else:
                    visible=True

                dist = DISTRIBUTIONS[i]
                params = NEW_PARAMS[name]
                yy = x=np.linspace(0.001,0.999,98)
                xx = [dist.ppf(i, *params) for i in yy]
                fig.add_trace(go.Scatter(x=xx, y=yy, line=dict(width=1),hoverinfo='name+text',
                                    mode='lines',hovertext="%.4f" % error,visible=visible,name=name+" "+"%1.3g" % error))
    fig.add_trace(go.Scatter(x=sorted(weight_vals_annual), y=Pvals_annual,
                        mode='markers',marker=dict(color='black',size=8),
                        name='data from station: '+SELECTED_STATIONS[0]))
    fig.update_layout(
        title="Fitting Distributions to Annual Maximum Data with Optimization on CDF",
        xaxis_title="Annual Maximum Measured Snow Weight (psf)",
        yaxis_title="Probability",
        template='plotly_white')
    fig.update_xaxes(range=[0, 1.5*max(weight_vals_annual)])
    fig.show() 
ErrorThreshold= 0.0162

Estimate 50yr MRI

In [22]:
#use the best fit distributions to estimate the 50yr MRI event. 
if True:
    best_fits=[]
    for i in range(len(DIST_NAMES)):
        try:
            if DIST_NAMES[i] not in skips:
                #error = f.df_errors['sumsquare_error'][DIST_NAMES[i]]
                error = ERROR[DIST_NAMES[i]]
                #print()
                if not np.isnan(error) and error<ErrorThreshold:
                    #print(DIST_NAMES[i],error)
                    best_fits.append(DIST_NAMES[i])
        except KeyError:
            pass
    #print(best_fits)
    print("50yr MRI Snow Load from Annual Maximums")
    for fit in best_fits:
        i = DIST_NAMES.index(fit)
        dist = DISTRIBUTIONS[i]
        params = NEW_PARAMS[fit]
        yy = 0.98
        xx = dist.ppf(yy, *params)
        print("%20s"%fit,"\t","%.2f" %xx,"psf")
50yr MRI Snow Load from Annual Maximums
                beta 	 161.90 psf
            bradford 	 154.16 psf
          gausshyper 	 122.18 psf
            gengamma 	 215.54 psf
              kappa4 	 124.69 psf

FIT A DISTRIBUTION (MONTHLY DATA)

In [23]:
#repeat with monthly data
if True:
    swe_vals_monthly = sorted(list(MONTHLY_DATA["Amount"]))
    h20 = 5.202288 #psf per inch of depth
    weight_vals_monthly = [i*h20 for i in swe_vals_monthly]
    n=len(weight_vals_monthly)   
In [24]:
#compute fit parameters
if True:
    Pvals_monthly = []
    X = []
    n = len(weight_vals_monthly)
    for i in range(len(sorted(weight_vals_monthly))):
        Pvals_monthly.append(i/(n+1))
        X.append(weight_vals_monthly[i])

    ERROR_monthly={}
    NEW_PARAMS_monthly={}

    for i in range(len(DIST_NAMES)):
        try:
            name = DIST_NAMES[i]
            dist = DISTRIBUTIONS[i]
            params = f.fitted_param[name]      

            sol = minimize(objfun, params, args=(dist,Pvals_monthly,weight_vals_monthly))
            error = sol.fun
            newparams = sol.x
            ERROR_monthly[name] = error
            NEW_PARAMS_monthly[name] = newparams

        except:
            pass
    ERROR_monthly = sorted(ERROR_monthly.items(), key=operator.itemgetter(1))
    ERROR_monthly = collections.OrderedDict(ERROR_monthly)
In [25]:
#plot the best fit distributions
if True:
    ErrorThreshold = np.sort(list(ERROR_monthly.values()))[showbest]
    print('ErrorThreshold=',"%.4f" %ErrorThreshold)
    
    fig = go.Figure()
    skips = []
    for i in range(len(DIST_NAMES)):
        name = DIST_NAMES[i]
        if name not in skips and name in ERROR_monthly:
            error = ERROR_monthly[name]
            if np.isnan(error) or error>ErrorThreshold:
                #visible='legendonly'
                visible = showall
            else:
                visible=True

            dist = DISTRIBUTIONS[i]
            params = NEW_PARAMS_monthly[name]
            yy = np.linspace(0.001,0.999,98)
            xx = [dist.ppf(i, *params) for i in yy]
            fig.add_trace(go.Scatter(x=xx, y=yy, line=dict(width=1),hoverinfo='name+text',
                                mode='lines',hovertext="%.4f" % error,visible=visible,name=name+" "+"%1.3g" % error))

    fig.add_trace(go.Scatter(x=sorted(weight_vals_monthly), y=Pvals_monthly,
                        mode='markers',marker=dict(color='black',size=8),
                        name='data from station: GRFM3'))

    fig.update_layout(
        title="Fitting Distributions to Monthly Maximum Data with Optimization on CDF",
        xaxis_title="Monthly Maximum Measured Snow Weight (psf)",
        yaxis_title="Probability",
        template='plotly_white')
    fig.update_xaxes(range=[0, 1.5*max(weight_vals_annual)])
    fig.show() 
ErrorThreshold= 0.0280
In [26]:
#use these distributions to estimate the 50yr MRI event. 
if True:
    best_fits=[]
    for i in range(len(DIST_NAMES)):
        name = DIST_NAMES[i]
        if DIST_NAMES[i] not in skips and name in ERROR_monthly:
            error = ERROR_monthly[name]
            if not np.isnan(error) and error<ErrorThreshold:

                best_fits.append(DIST_NAMES[i])

    #annual
    #  0.98 = 1 - (1/50)

    #monthly (considering 12 months per year)
    #  0.9983 = 1 - (1/(12*50)

    #monthly (considering 2.4 months per year)
    #  0.9917 = 1 - (1/(2.4*50)

    months_per_year = MONTHLY_DATA.shape[0]/(MONTHLY_DATA['year'].max()-MONTHLY_DATA['year'].min()+1)
    yy = 1 - (1/(months_per_year*50))
    #print(months_per_year,yy,"\n\n")
    print("\t\t50yr MRI Snow Load from Monthly Maximums")
    for fit in best_fits:
        i = DIST_NAMES.index(fit)
        dist = DISTRIBUTIONS[i]
        params = NEW_PARAMS_monthly[fit]
        xx = dist.ppf(yy, *params)
        print("%20s"%fit,"\t","%.2f" %xx,"psf")
		50yr MRI Snow Load from Monthly Maximums
                fisk 	 891.52 psf
          genextreme 	 986.32 psf
          halfcauchy 	 1216.06 psf
              kappa3 	 846.54 psf
          loglaplace 	 3146.75 psf

Discussion

In [ ]: